# PoP - Policing Socio-Geographic Boundaries and Inequality
# script for creating final dataset for Seattle.

#### libraries ####

library(MASS)
library(sp)
library(stargazer)
library(sf)
library(matrixStats)
library(tidyverse)
library(scales)
library(tidycensus)
library(gridExtra)
library(spdep)
library(grid)
library(ggplot2)
library(lattice)

# set working directory
#### downloading census data #### 

# Download Decennial Census data 2010

census_api_key(key = 'db6400c6d9d5b405e0f7f1e8872c3b8299afa6fb', install = TRUE)

vars_census <- c("P001001", "P012006", "P012007", "P012008", "P012009", "P012010",
                 "P012011", "P012012", "P005003", "P005004", "P005006", "P005010",
                 "P015001", "P005005", "P005007", "P005008", "P005009")

sea_sf1 <- tidycensus::get_decennial(geography = "block", state = "WA",
                                     county = "King County", variables = vars_census,
                                     year = 2010, output = "wide", geometry = TRUE)

# Download ACS data 2011-2016
# mhhi
# % poverty
# % unemployed
# college


vars_acs <- c("C24010_004E", "C24010_040E", "C24010_001E", "B23025_005E", "B23025_003E",
              "B17021_001E", "B17021_002E", "B15002_011E", "B15002_028E", "B15002_001E",
              "B11003_016E", "B11001_001E", "B25001_001E", "B25003_003E", "B25002_002E",
              "B25002_003E", "B16004_001E", "B16004_006E", "B16004_007E", "B16004_008E",
              "B16004_011E", "B16004_012E", "B16004_013E", "B16004_016E", "B16004_017E",
              "B16004_018E", "B16004_021E", "B16004_022E", "B16004_023E", "B16004_028E",
              "B16004_029E", "B16004_030E", "B16004_033E", "B16004_034E", "B16004_035E",
              "B16004_038E", "B16004_039E", "B16004_040E", "B16004_043E", "B16004_044E",
              "B16004_045E", "B16004_050E", "B16004_051E", "B16004_052E", "B16004_055E",
              "B16004_056E", "B16004_057E", "B16004_060E", "B16004_061E", "B16004_062E",
              "B16004_065E", "B16004_066E", "B16004_067E", "B16004_008E", "B16004_013E",
              "B16004_018E", "B16004_023E", "B16004_030E", "B16004_035E", "B16004_040E",
              "B16004_045E", "B16004_052E", "B16004_057E", "B16004_062E", "B16004_067E",
              "B16004_001E", "B25038_004E", "B25038_011E", "B25038_004E", "B25038_011E",
              "B19013_001", # mhhi 
              "B15003_001", # education universe 
              "B15003_022", "B15003_023", "B15003_024", "B15003_025", # number of college educated or more
              "B25003_001", # homeowner universe
              "B25003_002") # homeowner

vars_loaded = load_variables(dataset = "acs5", year = 2013)
# View(vars_loaded)

sea_acs <- tidycensus::get_acs(geography = "block group", state = "WA",
                               county = "King County", variables = vars_acs,
                               year = 2013, output = "wide", geometry = TRUE)

# herfindahl or fractionalization index

f_index <- function(x) 1 - rowSums(x^2)

# factor analysis

fac <- function(x, factors, rotation = "varimax", scores = "regression") {
  ok <- complete.cases(x)
  fa <- factanal(x[ok,], factors, rotation=rotation, scores=scores)
  score <- rep(NA, fa$n.obs + sum(!ok))
  score[ok] <- fa$scores
  return(score)
}


# recode Decennial Census

sea_sf1 <- sea_sf1 %>%
  transmute(
    BLK_CODE = GEOID,
    pop = P001001,
    hh = P015001,
    race_white = P005003,
    race_black = P005004,
    race_asian = P005006,
    race_hisp = P005010,
    race_other = P005005 + P005007 + P005008 + P005009,
    p_race_white = race_white / pop,
    p_race_black = race_black / pop,
    p_race_asian = race_asian / pop,
    p_race_hisp = race_hisp / pop,
    p_race_other = race_other / pop,
    age_15_35_male = P012006 + P012007 + P012008 + P012009 + P012010 +
      P012011 + P012012,
    hhi = f_index(cbind(p_race_white, p_race_black, p_race_asian,
                        p_race_hisp, p_race_other)),
    geometry
  )

sea_sf1$GEOID = substring(sea_sf1$BLK_CODE, 1, 12)

# recode ACS

sea_acs <- sea_acs %>%
  transmute(
    BG_CODE = GEOID,
    occ_management = C24010_004E + C24010_040E,
    occ_universe = C24010_001E,
    emp_unemployed = B23025_005E,
    emp_civ_lab_force = B23025_003E,
    poverty_universe = B17021_001E,
    poverty = B17021_002E,
    edu_hs = B15002_011E + B15002_028E,
    edu_universe = B15002_001E,
    hh_single_mother = B11003_016E,
    mhhi = B19013_001E, 
    hh = B11001_001E,
    hu = B25001_001E,
    hu_occupied_renter = B25003_003E,
    hu_occupied = B25002_002E,
    hu_vacant = B25002_003E,
    lang_universe = B16004_001E,
    lang_eng_limited = # Speak Eng less than "very well"
      B16004_006E + B16004_007E + B16004_008E +
      B16004_011E + B16004_012E + B16004_013E +
      B16004_016E + B16004_017E + B16004_018E +
      B16004_021E + B16004_022E + B16004_023E +
      B16004_028E + B16004_029E + B16004_030E +
      B16004_033E + B16004_034E + B16004_035E +
      B16004_038E + B16004_039E + B16004_040E +
      B16004_043E + B16004_044E + B16004_045E +
      B16004_050E + B16004_051E + B16004_052E +
      B16004_055E + B16004_056E + B16004_057E +
      B16004_060E + B16004_061E + B16004_062E +
      B16004_065E + B16004_066E + B16004_067E,
    lang_eng_no = B16004_008E + B16004_013E + B16004_018E + B16004_023E +
      B16004_030E + B16004_035E + B16004_040E + B16004_045E +
      B16004_052E + B16004_057E + B16004_062E + B16004_067E,
    lang_universe = B16004_001E,
    housing_moved_2000_2009 = B25038_004E + B25038_011E,
    housing_moved_2000_2009 = B25038_004E + B25038_011E,
    p_occ_management = occ_management / occ_universe,
    p_emp_unemployed = emp_unemployed / emp_civ_lab_force,
    p_poverty = poverty / poverty_universe,
    p_edu_hs = edu_hs / edu_universe,
    p_single_mother = hh_single_mother / hh,
    p_hu_occupied_renter = hu_occupied_renter / hu_occupied,
    p_hu_vacant = hu_vacant / hu,
    p_lang_eng_limited = lang_eng_limited / lang_universe,
    p_lang_eng_no = lang_eng_no / lang_universe,
    pcol = (B15003_022E + B15003_023E + B15003_024E + B15003_025E) / B15003_001E,
    pown = B25003_002E / B25003_001E,
    p_housing_moved_2000_2009 = housing_moved_2000_2009 / hu_occupied,
    # concentrated disadvantage, residential instability and immigrant concentration
    con_disadv = fac(cbind(p_occ_management, p_emp_unemployed, p_poverty,
                           p_edu_hs, p_single_mother), factors=1),
    res_instab = fac(cbind(p_hu_occupied_renter, p_housing_moved_2000_2009,
                           p_hu_vacant), factors=1),
    immi_con = rowMeans(cbind(p_lang_eng_limited, p_lang_eng_no))
  ) %>% dplyr:: select(BG_CODE, con_disadv, res_instab, immi_con, mhhi, pcol, pown, p_poverty,
         p_emp_unemployed)

# join decennial census with ACS data

sea_sf1$BG_CODE = substring(sea_sf1$BLK_CODE, 1, 12)
sea_bg <- sea_sf1 %>% left_join((sea_acs %>% mutate(geometry = NULL) %>% as.data.frame), by = "BG_CODE")

# merg with boundary data
# load shp file
sea_shp = read_sf("Municipal_Boundaries.shp") %>% 
  filter(CITYNAME == "Seattle") %>% dplyr::select(geometry)

# set crs
sea_shp = st_transform(sea_shp, st_crs(sea_bg))
st_crs(sea_shp) = st_crs(sea_bg)

# check whether geometry is valid
sea_bg = st_make_valid(sea_bg, dist = 0)
sea_shp = st_make_valid(sea_shp, dist = 0)
# find intersection
sea_bg = st_intersection(sea_shp, sea_bg)

# wombling, generating difference measure

sea_bg = st_make_valid(sea_bg, dist=0)

uq_bgs = unique(sea_bg$BLK_CODE)

# construct neighbors list from polygon list based on areas sharing more than one boundary pt
test = poly2nb(sea_bg)
# create bins to save boundary values
sea_bg$p_race_black_blv = NA
sea_bg$p_race_white_blv = NA
sea_bg$p_race_hisp_blv = NA
sea_bg$p_race_asian_blv = NA

# loop to generate boundary measure

for (i in 1:length(uq_bgs)) {
  
  print(paste0("Iteration ", i))
  
  sea_bg$p_race_black_blv[i] = 
    max(abs(sea_bg[sea_bg$BLK_CODE %in% uq_bgs[i], ]$p_race_black - 
              sea_bg[test[[i]], ]$p_race_black), na.rm = TRUE)
  sea_bg$p_race_white_blv[i] = 
    max(abs(sea_bg[sea_bg$BLK_CODE %in% uq_bgs[i], ]$p_race_white - 
              sea_bg[test[[i]], ]$p_race_white), na.rm = TRUE)
  sea_bg$p_race_hisp_blv[i] = 
    max(abs(sea_bg[sea_bg$BLK_CODE %in% uq_bgs[i], ]$p_race_hisp - 
              sea_bg[test[[i]], ]$p_race_hisp), na.rm = TRUE)
  sea_bg$p_race_asian_blv[i] = 
    max(abs(sea_bg[sea_bg$BLK_CODE %in% uq_bgs[i], ]$p_race_asian - 
              sea_bg[test[[i]], ]$p_race_asian), na.rm = TRUE)
  
}


# read in categorized 311 data
sea_311_cat <- read_csv("Seattle_311_clean.csv")

# format date
sea_311_cat<-sea_311_cat %>% separate(`Created Date`,sep=' ',into=c("date","time"))
sea_311_cat$date<- as.Date(sea_311_cat$date)

# filter to 2012 onward since arrests are 2011-2015
# but first format date column
sea_311 <- sea_311_cat %>% filter(date > "2011-12-31")


# locate 311 calls within census blocks

# remove missing values from 311 call geometry
sea_311 = sea_311 %>% filter(!is.na(Y_Value))

# convert 311 call data to sf
sea_311  <- st_as_sf(x = sea_311, coords = c(x="X_Value", y="Y_Value"))

# coords are on EPSG:2285 (on NAD83); so set initial coords to that system  
st_crs(sea_311) = 2285

# then transform to EPSG:4326 (on WGS84)
sea_311$geometry = st_transform(sea_311$geometry, "EPSG:4326")

# set crs btw census data and 311 calls
sea_311 = st_transform(sea_311, st_crs(sea_bg))
st_crs(sea_311) = st_crs(sea_bg)

sea_311_merged <- st_join(sea_311,sea_bg,
                          join = st_intersects)

sea_311_merged = sea_311_merged %>% dplyr::select(`Service Request Number`,BLK_CODE, `Request Type`)

# reshape data from long form to wide form with each call code as column
sea_311_merged$val = 1
wide_311_calls = sea_311_merged %>% 
  tidyr::pivot_wider(names_from = "Request Type", values_from = "val", values_fill = 0)

# get all column names
paste(colnames(x = wide_311_calls), collapse = "', '" )

## sum number of each type of call per category by BG and year
wide_311_calls_sum = wide_311_calls %>% dplyr:: group_by(BLK_CODE) %>% 
  summarise(across(c("Street Sign and Traffic Signal Maintenance", 
  "General Inquiry - Public Utilities", "Abandoned Vehicle", "General Inquiry - Customer Service Bureau", 
  "Pothole", "General Inquiry - Police Department", "Traffic Calming", "General Inquiry - Transportation", 
  "Overgrown Vegetation (Transportation)", "Parking Enforcement", "ADA Request (Transportation)", 
  "Damaged Sidewalk", "General Inquiry - City Attorney", "Police Department Records Request", 
  "Safe Routes to School", "Public Utilities Account Question", "Graffiti", "Seattle Center", "KeyArena"), sum)) %>% 
  as.data.frame %>% mutate(geometry = NULL)

# sum total calls per year
wide_311_calls_sum = wide_311_calls_sum %>%
  mutate(total_calls = rowSums(across(2:20)))

# sum top 7 categories 
wide_311_calls_sum$high_SES <- wide_311_calls_sum$`Police Department Records Request` + wide_311_calls_sum$`Traffic Calming` + 
  wide_311_calls_sum$`General Inquiry - Public Utilities` + wide_311_calls_sum$`Damaged Sidewalk` + wide_311_calls_sum$KeyArena + 
  wide_311_calls_sum$`Abandoned Vehicle` + wide_311_calls_sum$`Public Utilities Account Question`

wide_311_calls_sum$high_SES_stan <- (wide_311_calls_sum$high_SES/wide_311_calls_sum$total_calls)


sea_311_final = wide_311_calls_sum

# merge with sea_bg based on BLK_CODE
sea_311_final = sea_311_final %>% dplyr::select(c(BLK_CODE,high_SES_stan))
sea_bg <- merge(sea_bg, sea_311_final, by = "BLK_CODE", all.x = TRUE)

# wombling, generating diff measure between proportion of high SES calls
sea_bg = st_make_valid(sea_bg, dist=0)

uq_bgs = unique(sea_bg$BLK_CODE)

test = poly2nb(sea_bg)
sea_bg$ses_blv = NA

# loop to generate measure

for (i in 1:length(uq_bgs)) {
  
  print(paste0("Iteration ", i))
  
  sea_bg$ses_blv[i] = 
    max(abs(sea_bg[sea_bg$BLK_CODE %in% uq_bgs[i], ]$high_SES_stan - 
              sea_bg[test[[i]], ]$high_SES_stan), na.rm = TRUE)
}

sea_bg$ses_blv = 
  ifelse(sea_bg$ses_blv == Inf | sea_bg$ses_blv == -Inf, NA, sea_bg$ses_blv)

# save chi with boundary measures
save(x = sea_bg, file = "sea_blv.RData")


### data is confidential but providing the code used to aggregate geolocated arrests by census block
### at the end of the code that is commented out, load the dataset with arrests to proceed in the script


## Merge wombling block level data w arrest data
# Load geocoded arrest data
# load("sea_arrests.RData")
# 
# ## find intersection btw census data and arrest data
# st_crs(sea_arrests) = st_crs(sea_bg)
# sea_arrests = st_transform(sea_arrests, st_crs(sea_bg))
# 
# # specifically, find intersection between arrests by category
# 
# sea_ints = st_intersects(sea_arrests, sea_bg)
# # filter by types of arrests
# sea_ints2a = st_intersects(sea_arrests %>% filter(felony == 1), sea_bg)
# sea_ints2b = st_intersects(sea_arrests %>% filter(felony == 0), sea_bg)
# sea_ints2c = st_intersects(sea_arrests %>% filter(Violent == 1), sea_bg)
# sea_ints2d = st_intersects(sea_arrests %>% filter(Violent == 0), sea_bg)
# sea_ints2e = st_intersects(sea_arrests %>% filter(`Crime against society` == 1), sea_bg)
# sea_ints2f = st_intersects(sea_arrests %>% filter(`Crime against person` == 1), sea_bg)
# sea_ints2g = st_intersects(sea_arrests %>% filter(`Crime against Property` == 1), sea_bg)
# # filter by race of arrestee
# sea_ints2h = st_intersects(sea_arrests %>% filter(r_blk == 1), sea_bg)
# sea_ints2i = st_intersects(sea_arrests %>% filter(r_hisp == 1), sea_bg)
# sea_ints2j = st_intersects(sea_arrests %>% filter(r_wht == 1), sea_bg)
# sea_ints2k = st_intersects(sea_arrests %>% filter(r_oth == 1), sea_bg)
# 
# 
# # now filter by race of arrestee and find intersection
# 
# 
# # sum arrests in total and then by each type
# theout = sea_bg[sea_ints %>% unlist, ] %>% 
#   dplyr::select(BLK_CODE) %>% 
#   mutate(count = 1) %>% 
#   group_by(BLK_CODE) %>% 
#   summarize(total_arrests = sum(count, na.rm = TRUE)) %>% 
#   as.data.frame %>% 
#   mutate(geometry = NULL)
# 
# theout2a = sea_bg[sea_ints2a %>% unlist, ] %>% 
#   dplyr::select(BLK_CODE) %>%
#   mutate(count = 1) %>% 
#   group_by(BLK_CODE) %>% 
#   summarize(felony_arrests = sum(count, na.rm = TRUE)) %>% 
#   as.data.frame %>% 
#   mutate(geometry = NULL)
# 
# theout2b = sea_bg[sea_ints2b %>% unlist, ] %>% 
#   dplyr::select(BLK_CODE) %>%
#   mutate(count = 1) %>% 
#   group_by(BLK_CODE) %>% 
#   summarize(misdemeanor_arrests = sum(count, na.rm = TRUE)) %>% 
#   as.data.frame %>% 
#   mutate(geometry = NULL)
# 
# theout2c = sea_bg[sea_ints2c %>% unlist, ] %>% 
#   dplyr::select(BLK_CODE) %>%
#   mutate(count = 1) %>% 
#   group_by(BLK_CODE) %>% 
#   summarize(violent_arrests = sum(count, na.rm = TRUE)) %>% 
#   as.data.frame %>% 
#   mutate(geometry = NULL)
# 
# 
# theout2d = sea_bg[sea_ints2d %>% unlist, ] %>% 
#   dplyr::select(BLK_CODE) %>%
#   mutate(count = 1) %>% 
#   group_by(BLK_CODE) %>% 
#   summarize(nonviolent_arrests = sum(count, na.rm = TRUE)) %>% 
#   as.data.frame %>% 
#   mutate(geometry = NULL)
# 
# theout2e = sea_bg[sea_ints2e %>% unlist, ] %>% 
#   dplyr::select(BLK_CODE) %>%
#   mutate(count = 1) %>% 
#   group_by(BLK_CODE) %>% 
#   summarize(society_arrests = sum(count, na.rm = TRUE)) %>% 
#   as.data.frame %>% 
#   mutate(geometry = NULL)
# 
# theout2f = sea_bg[sea_ints2f %>% unlist, ] %>% 
#   dplyr::select(BLK_CODE) %>%
#   mutate(count = 1) %>% 
#   group_by(BLK_CODE) %>% 
#   summarize(person_arrests = sum(count, na.rm = TRUE)) %>% 
#   as.data.frame %>% 
#   mutate(geometry = NULL)
# 
# theout2g = sea_bg[sea_ints2g %>% unlist, ] %>% 
#   dplyr::select(BLK_CODE) %>%
#   mutate(count = 1) %>% 
#   group_by(BLK_CODE) %>% 
#   summarize(property_arrests = sum(count, na.rm = TRUE)) %>% 
#   as.data.frame %>% 
#   mutate(geometry = NULL)
# 
# theout2h = sea_bg[sea_ints2h %>% unlist, ] %>% 
#   dplyr::select(BLK_CODE) %>%
#   mutate(count = 1) %>% 
#   group_by(BLK_CODE) %>% 
#   summarize(arrests_black = sum(count, na.rm = TRUE)) %>% 
#   as.data.frame %>% 
#   mutate(geometry = NULL)
# 
# theout2i = sea_bg[sea_ints2i %>% unlist, ] %>% 
#   dplyr::select(BLK_CODE) %>%
#   mutate(count = 1) %>% 
#   group_by(BLK_CODE) %>% 
#   summarize(arrests_hisp = sum(count, na.rm = TRUE)) %>% 
#   as.data.frame %>% 
#   mutate(geometry = NULL)
# 
# theout2j = sea_bg[sea_ints2j %>% unlist, ] %>% 
#   dplyr::select(BLK_CODE) %>%
#   mutate(count = 1) %>% 
#   group_by(BLK_CODE) %>% 
#   summarize(arrests_white = sum(count, na.rm = TRUE)) %>% 
#   as.data.frame %>% 
#   mutate(geometry = NULL)
# 
# theout2k = sea_bg[sea_ints2k %>% unlist, ] %>% 
#   dplyr::select(BLK_CODE) %>%
#   mutate(count = 1) %>% 
#   group_by(BLK_CODE) %>% 
#   summarize(arrests_other = sum(count, na.rm = TRUE)) %>% 
#   as.data.frame %>% 
#   mutate(geometry = NULL)
# 
# 
# sea_bg2 = merge(sea_bg, theout, by = "BLK_CODE", all.x = TRUE)
# sea_bg2 = merge(sea_bg2, theout2a, by = "BLK_CODE", all.x = TRUE)
# sea_bg2 = merge(sea_bg2, theout2b, by = "BLK_CODE", all.x = TRUE)
# sea_bg2 = merge(sea_bg2, theout2c, by = "BLK_CODE", all.x = TRUE)
# sea_bg2 = merge(sea_bg2, theout2d, by = "BLK_CODE", all.x = TRUE)
# sea_bg2 = merge(sea_bg2, theout2e, by = "BLK_CODE", all.x = TRUE)
# sea_bg2 = merge(sea_bg2, theout2f, by = "BLK_CODE", all.x = TRUE)
# sea_bg2 = merge(sea_bg2, theout2g, by = "BLK_CODE", all.x = TRUE)
# sea_bg2 = merge(sea_bg2, theout2h, by = "BLK_CODE", all.x = TRUE)
# sea_bg2 = merge(sea_bg2, theout2i, by = "BLK_CODE", all.x = TRUE)
# sea_bg2 = merge(sea_bg2, theout2j, by = "BLK_CODE", all.x = TRUE)
# sea_bg2 = merge(sea_bg2, theout2k, by = "BLK_CODE", all.x = TRUE)
# 
# 
# 
# # replacing NAs for arrests with 0
# sea_bg2$total_arrests = ifelse(is.na(sea_bg2$total_arrests), 0, sea_bg2$total_arrests)
# sea_bg2$felony_arrests = ifelse(is.na(sea_bg2$felony_arrests), 0, sea_bg2$felony_arrests)
# sea_bg2$misdemeanor_arrests = ifelse(is.na(sea_bg2$misdemeanor_arrests), 0, sea_bg2$misdemeanor_arrests)
# sea_bg2$violent_arrests = ifelse(is.na(sea_bg2$violent_arrests), 0, sea_bg2$violent_arrests)
# sea_bg2$nonviolent_arrests = ifelse(is.na(sea_bg2$nonviolent_arrests), 0, sea_bg2$nonviolent_arrests)
# sea_bg2$society_arrests = ifelse(is.na(sea_bg2$society_arrests), 0, sea_bg2$society_arrests)
# sea_bg2$person_arrests = ifelse(is.na(sea_bg2$person_arrests), 0, sea_bg2$person_arrests)
# sea_bg2$property_arrests = ifelse(is.na(sea_bg2$property_arrests), 0, sea_bg2$property_arrests)
# sea_bg2$arrests_black = ifelse(is.na(sea_bg2$arrests_black), 0, sea_bg2$arrests_black)
# sea_bg2$arrests_hisp = ifelse(is.na(sea_bg2$arrests_hisp), 0, sea_bg2$arrests_hisp)
# sea_bg2$arrests_white = ifelse(is.na(sea_bg2$arrests_white), 0, sea_bg2$arrests_white)
# sea_bg2$arrests_other = ifelse(is.na(sea_bg2$arrests_other), 0, sea_bg2$arrests_other)
# 

### read in dataset with aggregate arrests by type to continue

load("sea_block_arrests.RData")

sea_block_arrests <- sea_bg2




#### incorporating crime data ####

seacrime = read_csv("SPD_Crime_Data__2008-Present.csv")

# so longitude and latitude need to be fixed. 
# filter NAs out of location
seacrime = seacrime %>%
  filter(!is.na(Longitude))

# convert to sf
seacrime  <- st_as_sf(x = seacrime, coords = c("Longitude", "Latitude"))

# cleaning crime data 
# subset by relevant years (Seattle arrests for years 2011-2015)
seacrime = seacrime %>% filter(grepl("2011|2012|2013|2014|2015",`Offense Start DateTime`))

## aggregating crime data by categories
# unique(seacrime$`Offense Parent Group`)
# unique(seacrime$Offense)

# add column classifying crime based on UCR description
seacrime = seacrime %>% 
  mutate(violent_crime = case_when(
    `Offense` == "Rape" | `Offense` == "Sexual Assault With An Object" |
      `Offense` == "Statutory Rape" | `Offense` == "Sodomy" |
      `Offense` == "Kidnapping/Abduction" | `Offense` == "Murder & Nonnegligent Manslaughter" |
      `Offense` == "Justifiable Homicide" | `Offense` == "Incest" | `Offense` == "Negligent Manslaughter" |
      `Offense` == "Simple Assault" |
      `Offense` == "Aggravated Assault" ~ 1,))


# generating 2 categories
# 1) violent crime 
# 2) property crime

seacrime$crime_property <- ifelse(seacrime$`Crime Against Category` == "PROPERTY", 1, 0)
seacrime$crime_violent <- ifelse(seacrime$violent_crime == 1, 1, 0)

seacrime = seacrime %>% 
  dplyr::select(crime_violent, crime_property, geometry)

# figuring out intersection 
# set CRS system
seacrime  <- st_set_crs(seacrime, 4326)
seacrime <- st_transform(seacrime, crs = 4326)
sea_bg2 <- st_transform(sea_bg2, crs = 4326)


sea_ints3a = st_intersects(seacrime %>% filter(crime_violent == 1), sea_bg2)
sea_ints3b = st_intersects(seacrime %>% filter(crime_property == 1), sea_bg2)

theout2a = sea_bg2[sea_ints3a %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>%
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(crime_violent = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

theout2b = sea_bg2[sea_ints3b %>% unlist, ] %>% 
  dplyr::select(BLK_CODE) %>%
  mutate(count = 1) %>% 
  group_by(BLK_CODE) %>% 
  summarize(crime_property = sum(count, na.rm = TRUE)) %>% 
  as.data.frame %>% 
  mutate(geometry = NULL)

sea_bg3 = merge(sea_bg2, theout2a, by = "BLK_CODE", all.x = TRUE)
sea_bg3 = merge(sea_bg3, theout2b, by = "BLK_CODE", all.x = TRUE)

sea_bg3$crime_violent = ifelse(is.na(sea_bg3$crime_violent), 0, sea_bg3$crime_violent)
sea_bg3$crime_property = ifelse(is.na(sea_bg3$crime_property), 0, sea_bg3$crime_property)

sea_bg3$crime_all = 
  sea_bg3$crime_violent + sea_bg3$crime_property


# add in com_density and phys_bound vars
load("sea_fin_pb3.RData")

# get indicator of physical boundaries by block
sea_phys = sea_fin_pb3 %>% dplyr::select(c(BLK_CODE, phys_bound))
sea_phys = sea_phys %>% as.data.frame() %>% dplyr::select(-c(geometry))

sea_bg3 <- merge(sea_bg3, sea_phys, by = "BLK_CODE")

# integrate commercial density
load("sea_cd.RData")
head(sea_cd)

# Calculate the average total jobs by census block
avg_total_jobs <- sea_cd %>%
  group_by(BLK_CODE) %>%
  summarise(total_jobs = mean(total_jobs, na.rm = TRUE))

sea_bg3 <- merge(sea_bg3, avg_total_jobs, by = "BLK_CODE")

# create com_density measure

sea_bg3$com_density <- sea_bg3$total_jobs/sea_bg3$pop
summary(sea_bg3$com_density)

#### quick edits before saving #### 

# resolving infinite numbers 

sea_bg3 = 
  sea_bg3 %>% 
  mutate(p_race_black_blv = ifelse(p_race_black_blv == Inf | p_race_black_blv == -Inf, NA, p_race_black_blv),
         p_race_hisp_blv = ifelse(p_race_hisp_blv == Inf | p_race_hisp_blv == -Inf, NA, p_race_hisp_blv),
         p_race_asian_blv = ifelse(p_race_asian_blv == Inf | p_race_asian_blv == -Inf, NA, p_race_asian_blv),
         p_race_white_blv = ifelse(p_race_white_blv == Inf | p_race_white_blv == -Inf, NA, p_race_white_blv))

# global measure

sea_bg3$edge_race_areal = 
  sea_bg3[, c("p_race_black_blv", "p_race_hisp_blv",
              "p_race_asian_blv", "p_race_white_blv")] %>% 
  mutate(geometry = NULL) %>% 
  as.data.frame %>% 
  as.matrix %>% 
  matrixStats::rowMaxs(x = .)

# pairwise measure 

sea_bg3 = 
  sea_bg3 %>% 
  mutate(edge_race_wb = p_race_black_blv * p_race_white_blv,
         edge_race_wh = p_race_hisp_blv * p_race_white_blv,
         edge_race_hb = p_race_hisp_blv * p_race_black_blv)

# arrest rate

sea_bg3$arrest_rate = (sea_bg3$total_arrests / sea_bg3$pop) * 10
sea_bg3$arrest_rate = 
  ifelse(sea_bg3$arrest_rate == Inf | sea_bg3$arrest_rate == -Inf, NA,
         sea_bg3$arrest_rate)

sea_bg3$lpop = log(sea_bg3$pop)

sea_bg3$felony_arrest_rate = (sea_bg3$felony_arrests / sea_bg3$pop) * 10
sea_bg3$felony_arrest_rate = 
  ifelse(sea_bg3$felony_arrest_rate == Inf | sea_bg3$felony_arrest_rate == -Inf, NA,
         sea_bg3$felony_arrest_rate)

sea_bg3$misdemeanor_arrest_rate = (sea_bg3$misdemeanor_arrests / sea_bg3$pop) * 10
sea_bg3$misdemeanor_arrest_rate = 
  ifelse(sea_bg3$misdemeanor_arrest_rate == Inf | sea_bg3$misdemeanor_arrest_rate == -Inf, NA,
         sea_bg3$misdemeanor_arrest_rate)

sea_bg3$violent_arrest_rate = (sea_bg3$violent_arrests / sea_bg3$pop) * 10
sea_bg3$violent_arrest_rate = 
  ifelse(sea_bg3$violent_arrest_rate == Inf | sea_bg3$violent_arrest_rate == -Inf, NA,
         sea_bg3$violent_arrest_rate)

sea_bg3$nonviolent_arrest_rate = (sea_bg3$nonviolent_arrests / sea_bg3$pop) * 10
sea_bg3$nonviolent_arrest_rate = 
  ifelse(sea_bg3$nonviolent_arrest_rate == Inf | sea_bg3$nonviolent_arrest_rate == -Inf, NA,
         sea_bg3$nonviolent_arrest_rate)

sea_bg3$society_arrest_rate = (sea_bg3$society_arrests / sea_bg3$pop) * 10
sea_bg3$society_arrest_rate = 
  ifelse(sea_bg3$society_arrest_rate == Inf | sea_bg3$society_arrest_rate == -Inf, NA,
         sea_bg3$society_arrest_rate)

sea_bg3$person_arrest_rate = (sea_bg3$person_arrests / sea_bg3$pop) * 10
sea_bg3$person_arrest_rate = 
  ifelse(sea_bg3$person_arrest_rate == Inf | sea_bg3$person_arrest_rate == -Inf, NA,
         sea_bg3$person_arrest_rate)

sea_bg3$property_arrest_rate = (sea_bg3$property_arrests / sea_bg3$pop) * 10
sea_bg3$property_arrest_rate = 
  ifelse(sea_bg3$property_arrest_rate == Inf | sea_bg3$property_arrest_rate == -Inf, NA,
         sea_bg3$property_arrest_rate)

# percent non-white
sea_bg3$p_race_nonwhite <- sea_bg3$p_race_black + sea_bg3$p_race_hisp + sea_bg3$p_race_asian + sea_bg3$p_race_other

# remove blocks with 0 pop
sea_bg3 <- subset(sea_bg3, sea_bg3$pop > 0)

#### saving #### 

sea_fin = sea_bg3
save(x = sea_fin, file = "sea_final.RData")



